1. /* slmllfft.cpp by K.Tsuru */
  2. // function ID = 217 DRADIX, BRADIX
  3. /**************************************************************
  4. SLong class
  5. See also "sfft.h".
  6. Provides a fast Fourie transform(FFT) multiplication.
  7. z = x*y
  8. It uses real discrete transform(RDFT).
  9. [merits]
  10. The work area becomes half.
  11. Version 2.10
  12. Ooura's program is introduced.
  13. rdft version is introduced
  14. July, 2001
  15. ****************************************************************/
  16. #ifndef SN_H
  17. #include "sn.h"
  18. #endif
  19. static uint fftSize = 0;
  20. static FFTWorkArea *F;
  21. static NCBlock<fftType> Wx, Wy; //For x and y.
  22. void rdft(int n, int isgn, fftType *a, int *ip, fftType *w);
  23. /*************** member functions of FFTWorkArea *************************************/
  24. /*
  25. struct FFTWorkArea{
  26. int size, ipsz;
  27. int* ip; // for bit reverse table
  28. double* w; // for sine/cosine table
  29. //constructor
  30. FFTWorkArea():size(0),ipsz(0),ip(NULL),w(NULL){}
  31. void Allocate(int N); // 217
  32. void MemFree(); // 217
  33. ~FFTWorkArea(){}
  34. };
  35. */
  36. #if USES_SIN_COS_TABLE
  37. uint FFTsinSize(){ // table size of sine.
  38. if(fftSize==0) return 0;
  39. return (uint)FFTSineTableSize();
  40. }
  41. #else
  42. uint FFTsinSize(){ // table size of sine.
  43. if(fftSize==0) return 0;
  44. int i, s = 0;
  45. for(i=0;i<=FFT_MAX_SIZE_BITS;i++) s += F[i].size;
  46. return (uint)s;
  47. }
  48. #endif
  49. void FFTWorkArea::Allocate(int N){ // N is FFT size which is power of 2.
  50. ipsz = 2+(1<<(int)(log(N/2+0.5)/log(2.0))/2);
  51. ip = new int[ipsz+2];
  52. ip[0]=0;
  53. size=N/2+2;
  54. w = new fftType[size];
  55. }
  56. void FFTWorkArea::MemFree(){
  57. delete[] ip; delete[] w; ip=NULL; w=NULL;
  58. size = ipsz = 0;
  59. }
  60. /*************************************************************************************/
  61. uint FFTSize(){ return fftSize; }
  62. uint FFTWorkSize(){ // size of work area
  63. if(fftSize==0) return 0;
  64. return FFTsinSize()+Wx.size()+Wy.size();
  65. }
  66. // in the case x != y
  67. static void rdftProduct(const fType* xv, uint hx, const fType* yv, uint hy,
  68. fftType* X, fftType* Y, uint N, int *ip, fftType *w){
  69. // The memory of *X and *Y must be prepared in call routine.
  70. uint i, i2;
  71. // step1 initialization
  72. for(i = 0; i <= hx ; i++) X[i] = xv[i];
  73. while(i<N){ X[i] = 0.0; i++; }
  74. for(i = 0; i <= hy ; i++) Y[i] = yv[i];
  75. while(i<N){ Y[i] = 0.0; i++; }
  76. // step 2 Fourie transform
  77. rdft(N, 1, X, ip, w);
  78. rdft(N, 1, Y, ip, w);
  79. // step 3 calculation of products
  80. fftType xr, xi, yr, yi;
  81. X[0] = X[0]*Y[0];
  82. X[1] = X[1]*Y[1];
  83. for(i = 1; i < N/2; i++){
  84. i2 = 2*i;
  85. xr = X[i2];
  86. xi = X[i2+1];
  87. yr = Y[i2];
  88. yi = Y[i2+1];
  89. X[i2] = xr*yr - xi*yi;
  90. X[i2+1] = xr*yi + xi*yr;
  91. }
  92. }
  93. // in the case x == y
  94. static void rdftSquare(const fType* xv, uint hx, fftType* X, uint N, int *ip, fftType *w){
  95. uint i, i2;
  96. // step1 initialization
  97. for(i = 0; i <= hx ; i++) X[i] = xv[i];
  98. while(i<N){ X[i] = 0.0; i++; }
  99. // step 2 Fourie transform
  100. rdft(N, 1, X, ip, w);
  101. // step 3 calculation of products
  102. fftType xr, xi;
  103. X[0] = X[0]*X[0];
  104. X[1] = X[1]*X[1];
  105. for(i = 1; i < N/2; i++){
  106. i2 = 2*i;
  107. xr = X[i2];
  108. xi = X[i2+1];
  109. X[i2] = xr*xr - xi*xi;
  110. X[i2+1] = 2.0*xr*xi;
  111. }
  112. }
  113. // select the normalization method
  114. #ifdef _WIN64 // 64bit
  115. #define USES_llongNormalize 1
  116. #else
  117. #define USES_llongNormalize 0
  118. #endif
  119. #if USES_llongNormalize == 0
  120. //When double > LONG_MAX the substitution "long = double" is not allowed.
  121. static fType fftNormalize(fftType x, const fType radix, fftType *carry) {
  122. fftType r;
  123. x += ROUND_DBL_INT;
  124. r = fftFmod(x, (fftType)radix); // x = 123999456.999687001
  125. //cout << "x = "<< x << " r = "<< r << endl; // r = 9456.9996870011...
  126. *carry = (x-r)/radix; // carry = 12399.94569999, r = 9457.24968795478344
  127. // carry = 12399 f = 9457
  128. return (fType)(r + ROUND_DBL_INT); // to integer
  129. }
  130. #endif
  131. #if USES_LONG_DBL_FFT
  132. fftType fftModfl(const fftType x, fftType *ip){// modfl() my version
  133. long long int ipl = x;
  134. *ip = ipl;
  135. return x-ipl;
  136. }
  137. #endif
  138. ////////////////////// main body /////////////////////////////
  139. /// z = x*y ///
  140. void SLong::LLMultFFT(const SLong& x, const SLong& y, SLong& z, uint N ){
  141. fftType *X, *Y, *w;
  142. int tblNo = howpow2(N), *ip;
  143. if(fftSize==0) F=new FFTWorkArea[FFT_MAX_SIZE_BITS+1];
  144. if(F[tblNo].size==0) F[tblNo].Allocate((int)N);
  145. fftSize = N;
  146. w = F[tblNo].w; ip = F[tblNo].ip;
  147. if(Wx.size() == 0) z.fftUsedTimes = 0;
  148. if(Wx.size() < N){
  149. Wx.size(N, -1); // enlarge only
  150. Wy.size(N, -1);
  151. }
  152. X = Wx.Elements(); Y = Wy.Elements();
  153. uint i;
  154. uint hx = x.Head(), hy = y.Head();
  155. #ifndef NDEBUG
  156. assert(N <= z.FFTMaxArraySize());
  157. assert(x.Type() & x.DEC_INT);
  158. #endif
  159. if(hx+hy+2u > N){
  160. x.SetError(x.SYNTAX_ERR, "LLMultFFT", 217);
  161. }
  162. const fType* xv = x.ReadFigures();
  163. const fType* yv = y.ReadFigures();
  164. // step 1,2,3 initialization, rdft() and make products.
  165. if(&x == &y)rdftSquare(xv, hx, X, N, ip, w);
  166. else rdftProduct(xv, hx, yv, hy, X, Y, N, ip, w);
  167. // step 4 inverse transform and division by N/2
  168. rdft(N, -1, X, ip, w);
  169. fftType recN2 = 2.0/(fftType)N;
  170. for(i = 0; i < N; i++) X[i] *= recN2;
  171. //It checks that X[n] are very close to integer or not.
  172. if(SNManager::FFTVerify() == ON){
  173. fftType ipart;
  174. const fftType dmax=(fftType)fftLdexp(1.0, fftType_MANT_DIG); // 1.0*2^DBL_MANT_DIG(=53)
  175. const double err = ROUND_DBL_INT;
  176. for(i = 0; i < N; i++){
  177. #if USES_LONG_DBL_FFT
  178. fftModfl(X[i]+ROUND_DBL_INT, &ipart);
  179. #else
  180. fftModf(X[i]+ROUND_DBL_INT, &ipart);
  181. #endif
  182. if((fftFabs(ipart - X[i]) >= err) || (ipart>dmax)){
  183. z.SetError(z.FFT_ERR, "round off", 217);
  184. }
  185. }
  186. }
  187. //step 5 normalization in double and conversion to "fType" type
  188. uint zh = x.aHead + y.aHead +1u, zt = x.aTail + y.aTail;
  189. //It allocates the memory.
  190. if(z.figure.size() < N){
  191. z.valloc(N, -1);
  192. }
  193. z.figure.clear(zh); //The part which is not substituted here is filled with zeros.
  194. fType* zv = z.figure.Elements();
  195. //It cannot obtain a faster speed by treating the BRADIX as another processing.
  196. #if USES_llongNormalize //defined _WIN64 // since version 2.30
  197. // 64 bit integer
  198. long long int t, rdx = x.Radix();;
  199. for(i = 0; i < zh ; i++){
  200. X[i] += ROUND_DBL_INT; //abc.99999 ==> abC.0???, ROUND_DBL_INT = 0.25
  201. if(X[i] < rdx){
  202. zv[i] = (fType)X[i];
  203. continue;
  204. }
  205. t = X[i]; // double --> long long integer
  206. zv[i] = t % rdx; // remainder
  207. X[i+1] += t / rdx; // carry
  208. }
  209. #else
  210. fftType carry;
  211. for(i = 0; i < zh ; i++){
  212. zv[i] = fftNormalize(X[i], x.Radix(), &carry);
  213. X[i+1] += carry;
  214. }
  215. #endif
  216. #ifndef NDEBUG
  217. z.figure[i]; assert(z(i+1u) == 0);
  218. #endif
  219. // last figure By the normalization it is possible that X[2*i] != 0.
  220. zv[i] = fType(X[i] + ROUND_DBL_INT);
  221. z.SetSign( x.Sign()*y.Sign() );
  222. // step 6 It decides the figure positions.
  223. while(!zv[zh]) zh--;
  224. z.aHead = zh;
  225. while(!zv[zt]) zt++;
  226. z.aTail = zt;
  227. #ifndef NDEBUG
  228. assert(zt <= zh);
  229. #endif
  230. z.fftUsedTimes++;
  231. }

slmllfft.cpp : last modifiled at 2017/09/21 10:34:52(6,936 bytes)
created at 2017/10/07 10:26:48
The creation time of this html file is 2017/11/09 14:52:03 (Thu Nov 09 14:52:03 2017).